#' =============================================================================
#' FILE: 00_latinobarometro_data.R
#' DESCRIPTION:
#'   Processes Latinobarometro survey data (2002–2020) to analyze trust in
#'   political parties in Latin America. Imports, cleans, and merges country-level
#'   datasets for each survey year, standardizes variable names/values, and
#'   produces a harmonized dataset for analysis.
#'
#' NOTE:
#'   Download the required `.dta` files for each year/country and place them in
#'   the corresponding folders. For more information, see readme_latinobarometro_data.
#'   Data source: https://www.latinobarometro.org/lat.jsp
#'   For replication, use the harmonized file generated at the end of this script.
#'
#' PACKAGES REQUIRED: pacman, tidyverse, haven
#'
#' OUTPUTS:
#'   - 04_outputs/latinobarometro_trust.rds
#' =============================================================================

# Load Required Packages and data ----------------------------------------------

# Install and load required packages
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse,
               haven)

# Load datasets
df02 <- read_dta('02_data/latinobarometro/Latinobarometro_2002_datos_eng_v2014_06_27.dta')
df03 <- read_dta('02_data/latinobarometro/Latinobarometro_2003_datos_eng_v2014_06_27.dta')
df04 <- read_dta('02_data/latinobarometro/Latinobarometro_2004_datos_eng_v2014_06_27.dta')
df05 <- read_dta('02_data/latinobarometro/Latinobarometro_2005_datos_eng_v2014_06_27.dta')
df07 <- read_dta('02_data/latinobarometro/Latinobarometro_2007_datos_eng_v2014_06_27.dta')
df08 <- read_dta('02_data/latinobarometro/Latinobarometro_2008_datos_eng_v2014_06_27.dta')
df09 <- read_dta('02_data/latinobarometro/Latinobarometro_2009_datos_eng_v2014_06_27.dta')
df10 <- read_dta('02_data/latinobarometro/Latinobarometro_2010_datos_eng_v2014_06_27.dta')
df11 <- read_dta('02_data/latinobarometro/Latinobarometro_2011_eng.dta')
df13 <- read_dta('02_data/latinobarometro/Latinobarometro2013Eng.dta')
df15 <- read_dta('02_data/latinobarometro/Latinobarometro_2015_Eng.dta')
df16 <- read_dta('02_data/latinobarometro/Latinobarometro2016Eng_v20170205.dta')
df17 <- read_dta('02_data/latinobarometro/Latinobarometro2017Eng_v20180117.dta')
df18 <- readRDS('02_data/latinobarometro/Latinobarometro_2018_Esp_R_v20190303.rds')
load('02_data/latinobarometro/Latinobarometro_2020_Eng_Rdata_v1_0.rdata')
df20 <- Latinobarometro_2020_Eng

# Country names and years ------------------------------------------------------
# Helper function to standardize country names
standardize_country <- function(df, id_var = "idenpa", recode_vec = NULL) {
  df$country <- as.character(as_factor(df[[id_var]]))
  if (!is.null(recode_vec)) {
    df$country <- recode(df$country, !!!recode_vec)
  }
  return(df)
}

# Named vectors for recoding
recode_domrep <- c('Dominican Rep.' = 'Dominican Republic', 
              'RepÃºblica Dominicana' = 'Dominican Republic')

# Apply to each dataframe
df02 <- standardize_country(df02, recode_vec = recode_domrep)
df03 <- standardize_country(df03, recode_vec = recode_domrep)
df04 <- standardize_country(df04, recode_vec = recode_domrep)
df05 <- standardize_country(df05, recode_vec = recode_domrep)
df07 <- standardize_country(df07, recode_vec = recode_domrep)
df08 <- standardize_country(df08, recode_vec = recode_domrep)
df09 <- standardize_country(df09, recode_vec = recode_domrep)
df10 <- standardize_country(df10, recode_vec = recode_domrep)
df11 <- standardize_country(df11, recode_vec = recode_domrep)
df13 <- standardize_country(df13, recode_vec = recode_domrep)
df15 <- standardize_country(df15, recode_vec = recode_domrep)
df16 <- standardize_country(df16, recode_vec = recode_domrep)
df17 <- standardize_country(df17, recode_vec = recode_domrep)

# For 2018 and 2020, use a different mapping (IDENPA is numeric code)
country_codes <- c(
  '32' = 'Argentina', '68' = 'Bolivia', '76' = 'Brazil', '152' = 'Chile',
  '170' = 'Colombia', '188' = 'Costa Rica', '214' = 'Dominican Republic',
  '218' = 'Ecuador', '222' = 'El Salvador', '320' = 'Guatemala',
  '340' = 'Honduras', '484' = 'Mexico', '558' = 'Nicaragua', '591' = 'Panama',
  '600' = 'Paraguay', '604' = 'Peru', '724' = 'Spain', '858' = 'Uruguay',
  '862' = 'Venezuela'
)
df18$country <- recode(as.character(df18$IDENPA), !!!country_codes)
df20$country <- recode(as.character(df20$idenpa), !!!country_codes)

# Recode trust -----------------------------------------------------------------

# Function to recode trust variable
recode_trust <- function(df, varname) {
  df$trust <- ((ifelse(df[[varname]] > 0, df[[varname]], NA) * -1) + 4) / 3
  return(df)
}

# 2002
df02 <- recode_trust(df02, "p34stf")
# 2003
df03 <- recode_trust(df03, "p21std")
# 2004
df04 <- recode_trust(df04, "p34std")
# 2005
df05 <- recode_trust(df05, "p47stb")
# 2007
df07 <- recode_trust(df07, "p27st_e")
# 2008
df08 <- recode_trust(df08, "p28st_c")
# 2009
df09 <- recode_trust(df09, "p26st_c")
# 2010
df10 <- recode_trust(df10, "P20ST_C")
# 2011
df11 <- recode_trust(df11, "P22ST_C")
# 2013
df13 <- recode_trust(df13, "P26TGB_G")
# 2015
df15 <- recode_trust(df15, "P19ST_C")
# 2016
df16 <- recode_trust(df16, "P13STG")
# 2017
df17 <- recode_trust(df17, "P14ST_G")
# 2018
df18 <- recode_trust(df18, "P15STGBSC.G")
# 2020
df20 <- recode_trust(df20, "p13st.g")

# Create dataset with only relevant variables ----------------------------------

df <- bind_rows(
  df02  %>% mutate(year = 2002) %>% select(country, trust, year),
  df03  %>% mutate(year = 2003) %>% select(country, trust, year),
  df04  %>% mutate(year = 2004) %>% select(country, trust, year),
  df05  %>% mutate(year = 2005) %>% select(country, trust, year),
  df07  %>% mutate(year = 2007) %>% select(country, trust, year),
  df08  %>% mutate(year = 2008) %>% select(country, trust, year),
  df09  %>% mutate(year = 2009) %>% select(country, trust, year),
  df10  %>% mutate(year = 2010) %>% select(country, trust, year),
  df11  %>% mutate(year = 2011) %>% select(country, trust, year),
  df13  %>% mutate(year = 2013) %>% select(country, trust, year),
  df15  %>% mutate(year = 2015) %>% select(country, trust, year),
  df16  %>% mutate(year = 2016) %>% select(country, trust, year),
  df17  %>% mutate(year = 2017) %>% select(country, trust, year),
  df18  %>% mutate(year = 2018) %>% select(country, trust, year),
  df20  %>% mutate(year = 2020) %>% select(country, trust, year)
)

saveRDS(df, '04_outputs/latinobarometro_trust.rds')